Import Libraries

¶
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objs as go
%matplotlib inline
In [4]:
#read all datasets
match_data = pd.read_csv('FIFA_WC_2022_MATCH_data.csv',encoding='latin1')
#player data
player_time = pd.read_csv('player_playingtime1.csv')
player_def =pd.read_csv('player_defense1.csv')
player_keepers = pd.read_csv('player_keepers1.csv')
player_shooting = pd.read_csv('player_shooting1.csv')
player_stats = pd.read_csv('player_stats1.csv')
player_misc = pd.read_csv('player_misc1.csv')

Data preprocessing

In [5]:
match_data.head(3)
Out[5]:
match_no day_of_week date hour venue referee group 1 2 attendance ... 1_panelties_scored 2_panelties_scored 1_goal_prevented 2_goal_prevented 1_own_goal 2_own_goal 1_forced_turnovers 2_forced_turnovers 1_defensive_pressure_applied 2_defensive_pressure_applied
0 1 Sun 20-Nov-22 17:00 Al Bayt Stadium Daniele Orsato Group A QATAR ECUADOR 67372 ... 0 1 6 5 0 0 52 72 256 279
1 2 Mon 21-Nov-22 14:00 Khalifa International Stadium Raphael Claus Group B ENGLAND IRAN 45334 ... 0 1 8 13 0 0 63 72 139 416
2 3 Mon 21-Nov-22 17:00 Al Thumama Stadium Wilton Sampaio Group A SENEGAL NETHERLANDS 41721 ... 0 0 9 15 0 0 63 73 263 251

3 rows × 59 columns

In [6]:
match_data.shape
Out[6]:
(62, 59)
In [7]:
#rename columns to be clear
match_data.rename(columns={'1':'home_team',
                           '2':'away_team',
                           '1_panelties_scored':'homeg_panelties',
                           '2_panelties_scored':'awayg_panelties',
                           '1_attempts':'home_attempts',
                           '1_goals':'home_goals',
                           '2_goals':'away_goals',
                           '1_conceded':'home_conceded',
                           '2_conceded':'away_conceded',
                           '1_yellow_cards':'home_yellow_cards',
                           '1_goal_prevented':'home_goal_prevented',
                           '2_goal_prevented':'away_goal_prevented',
                           '1_own_goal':'home_owngoals',
                           '2_own_goal':'away_owngoals',
                           '1_forced_turnovers':'home_forced_turnovers',
                           '2_forced_turnovers':'away_forced_turnovers',
                           '1_defensive_pressure_applied':'home_pressure',
                           '2_defensive_pressure_applied':'away_pressure'
                           
                          },inplace=True)
In [8]:
match_data.rename(
    columns={
        '1_passes':'home_passes',
        '2_passes':'away_passes',
        '1_free_kicks':'home_free_kicks',
        '2_free_kicks':'away_free_kicks',
        '2_yellow_cards':'away_yellow_cards',
        '1_red_cards':'home_RedCards',
        '2_red_cards':'away_RedCards',
        '1_xg':'homeXG',
        '2_xg':'awayXG',
            },inplace=True)
In [9]:
match_data.rename(columns={'1_goal_inside_penalty_area':'homeG_inside',
                           '2_goal_inside_penalty_area':'awayG_inside',
                          '1_goal_outside_penalty_area':'homeG_outside',
                          '2_goal_outside_penalty_area':'awayG_outside',
                          '1_poss':'home_poss',
                          '2_poss':'away_poss',
                          '1_offside':'home_offside',
                          '2_offside':'away_offside',
                          '1_corners':'home_corners',
                          '2_corners':'away_corners',
                          '2_attempts':'away_attempts',
                          '1_ontarget':'home_ontarget',
                          '2_ontarget':'away_ontarget',
                          '1_offtarget':'home_offtarget',
                          '2_offtarget':'away_offtarget',
                          'faul_against_1':'home_fauls',
                           'faul_against_2':'away_fauls',
                           '1_passes_completed':'home_completed_passes',
                           '2_passes_completed':'away_completed_passes'
                           
                          },inplace=True)
In [10]:
match_data.columns
Out[10]:
Index(['match_no', 'day_of_week', 'date', 'hour', 'venue', 'referee', 'group',
       'home_team', 'away_team', 'attendance', 'homeXG', 'awayXG', 'home_poss',
       'away_poss', 'home_goals', 'away_goals', 'score', 'home_attempts',
       'away_attempts', 'home_conceded', 'away_conceded', 'homeG_inside',
       'awayG_inside', 'homeG_outside', 'awayG_outside', 'home_ontarget',
       'away_ontarget', 'home_offtarget', 'away_offtarget',
       '1_attempts_inside_penalty_area', '2_attempts_inside_penalty_area',
       '1_attempts_outside_penalty_area', '2_attempts_outside_penalty_area',
       'home_yellow_cards', 'away_yellow_cards', 'home_RedCards',
       'away_RedCards', 'home_fauls', 'away_fauls', '1_offsides', '2_offsides',
       'home_passes', 'away_passes', '1_passes_compeletd',
       '2_passes_compeletd', 'home_corners', 'away_corners', 'home_free_kicks',
       'away_free_kicks', 'homeg_panelties', 'awayg_panelties',
       'home_goal_prevented', 'away_goal_prevented', 'home_owngoals',
       'away_owngoals', 'home_forced_turnovers', 'away_forced_turnovers',
       'home_pressure', 'away_pressure'],
      dtype='object')
In [11]:
match_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62 entries, 0 to 61
Data columns (total 59 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   match_no                         62 non-null     int64  
 1   day_of_week                      62 non-null     object 
 2   date                             62 non-null     object 
 3   hour                             62 non-null     object 
 4   venue                            62 non-null     object 
 5   referee                          62 non-null     object 
 6   group                            62 non-null     object 
 7   home_team                        62 non-null     object 
 8   away_team                        62 non-null     object 
 9   attendance                       62 non-null     int64  
 10  homeXG                           62 non-null     float64
 11  awayXG                           62 non-null     float64
 12  home_poss                        62 non-null     int64  
 13  away_poss                        62 non-null     int64  
 14  home_goals                       62 non-null     int64  
 15  away_goals                       62 non-null     int64  
 16  score                            62 non-null     object 
 17  home_attempts                    62 non-null     int64  
 18  away_attempts                    62 non-null     int64  
 19  home_conceded                    62 non-null     int64  
 20  away_conceded                    62 non-null     int64  
 21  homeG_inside                     62 non-null     int64  
 22  awayG_inside                     62 non-null     int64  
 23  homeG_outside                    62 non-null     int64  
 24  awayG_outside                    62 non-null     int64  
 25  home_ontarget                    62 non-null     int64  
 26  away_ontarget                    62 non-null     int64  
 27  home_offtarget                   62 non-null     int64  
 28  away_offtarget                   62 non-null     int64  
 29  1_attempts_inside_penalty_area   62 non-null     int64  
 30  2_attempts_inside_penalty_area   62 non-null     int64  
 31  1_attempts_outside_penalty_area  62 non-null     int64  
 32  2_attempts_outside_penalty_area  62 non-null     int64  
 33  home_yellow_cards                62 non-null     int64  
 34  away_yellow_cards                62 non-null     int64  
 35  home_RedCards                    62 non-null     int64  
 36  away_RedCards                    62 non-null     int64  
 37  home_fauls                       62 non-null     int64  
 38  away_fauls                       62 non-null     int64  
 39  1_offsides                       62 non-null     int64  
 40  2_offsides                       62 non-null     int64  
 41  home_passes                      62 non-null     int64  
 42  away_passes                      62 non-null     int64  
 43  1_passes_compeletd               62 non-null     int64  
 44  2_passes_compeletd               62 non-null     int64  
 45  home_corners                     62 non-null     int64  
 46  away_corners                     62 non-null     int64  
 47  home_free_kicks                  62 non-null     int64  
 48  away_free_kicks                  62 non-null     int64  
 49  homeg_panelties                  62 non-null     int64  
 50  awayg_panelties                  62 non-null     int64  
 51  home_goal_prevented              62 non-null     int64  
 52  away_goal_prevented              62 non-null     int64  
 53  home_owngoals                    62 non-null     int64  
 54  away_owngoals                    62 non-null     int64  
 55  home_forced_turnovers            62 non-null     int64  
 56  away_forced_turnovers            62 non-null     int64  
 57  home_pressure                    62 non-null     int64  
 58  away_pressure                    62 non-null     int64  
dtypes: float64(2), int64(48), object(9)
memory usage: 28.7+ KB
In [12]:
match_data['date'] = pd.to_datetime(match_data['date'])
match_data['hour'] = pd.to_datetime(match_data['hour']).dt.hour
In [13]:
match_data[['date','hour']].dtypes
Out[13]:
date    datetime64[ns]
hour             int64
dtype: object

handle players_data

In [14]:
player_time['age'].head(2)
Out[14]:
0    30-067
1    32-094
Name: age, dtype: object
In [15]:
player_time.dtypes
Out[15]:
player                  object
position                object
team                    object
age                     object
birth_year               int64
games                    int64
minutes                float64
minutes_per_game       float64
minutes_pct            float64
minutes_90s            float64
games_starts             int64
minutes_per_start      float64
games_complete           int64
games_subs               int64
minutes_per_sub        float64
unused_subs              int64
points_per_game        float64
on_goals_for           float64
on_goals_against       float64
plus_minus             float64
plus_minus_per90       float64
plus_minus_wowy        float64
on_xg_for              float64
on_xg_against          float64
xg_plus_minus          float64
xg_plus_minus_per90    float64
xg_plus_minus_wowy     float64
dtype: object
In [16]:
player_time.columns
Out[16]:
Index(['player', 'position', 'team', 'age', 'birth_year', 'games', 'minutes',
       'minutes_per_game', 'minutes_pct', 'minutes_90s', 'games_starts',
       'minutes_per_start', 'games_complete', 'games_subs', 'minutes_per_sub',
       'unused_subs', 'points_per_game', 'on_goals_for', 'on_goals_against',
       'plus_minus', 'plus_minus_per90', 'plus_minus_wowy', 'on_xg_for',
       'on_xg_against', 'xg_plus_minus', 'xg_plus_minus_per90',
       'xg_plus_minus_wowy'],
      dtype='object')
In [ ]:
 
In [17]:
player_time[['age','un']]=player_time.age.str.split(pat='-',expand=True) 
player_def[['age','un']]=player_def.age.str.split(pat='-',expand=True) 
player_keepers[['age','un']]=player_keepers.age.str.split(pat='-',expand=True)
player_misc[['age','un']]=player_misc.age.str.split(pat='-',expand=True)
player_stats[['age','un']]=player_stats.age.str.split(pat='-',expand=True)
player_shooting[['age','un']]=player_shooting.age.str.split(pat='-',expand=True)
In [18]:
player_def.drop(['un'],axis=1,inplace=True)
player_keepers.drop(['un'],axis=1,inplace=True)
player_shooting.drop(['un'],axis=1,inplace=True)
player_stats.drop([ 'un'],axis=1,inplace=True)
player_misc.drop(['un'],axis=1,inplace=True)
In [19]:
l1 =player_time.columns
l2 =player_shooting.columns
l3=player_misc.columns
l5=player_keepers.columns
l6=player_stats.columns
In [20]:
print(len(l1),len(l2),len(l3),len(l5),len(l6))
28 23 22 25 31
In [21]:
player_def.age=player_time.age.astype(int)
player_keepers.age=player_time.age.astype(int)
player_misc.age=player_time.age.astype(int)
player_shooting.age=player_time.age.astype(int)
player_stats.age=player_time.age.astype(int)
In [ ]:
 

Visualization

  1. Histogram
  2. Barplot
  3. Piechart
  4. TreeMap
  5. Heatmap
  6. ScatterPlot

Histogram

  • Using to show Distribution of numerical attributes
In [22]:
fig = match_data.hist(figsize=(20,20))
In [23]:
px.histogram(data_frame=player_shooting,x=player_shooting.goals,color='team',title='count of goals by teams')
In [24]:
groups= player_misc.groupby(['team','player','position']).sum()[['aerials_lost']]
groups.reset_index(inplace=True)
mo=groups.where(groups.team=='Morocco').dropna()
In [25]:
px.histogram(data_frame=mo,x=mo.player,y=mo.aerials_lost,color=mo.position)
In [26]:
sns.histplot(data=match_data,x=match_data.home_goals,legend='home_team')
Out[26]:
<AxesSubplot:xlabel='home_goals', ylabel='Count'>

BarPlot

this barplot shows every team with players postions mainly four different positions are found commonly in every team

In [27]:
figure = px.bar(player_stats,x='team',color='position',barmode='group',title='Group Players by team and Postion')

figure.show()
In [28]:
#interactive
match_data['goals_total'] = match_data.home_goals+match_data.away_goals
Groups_total_goals = match_data.groupby(by='group').sum()[['goals_total']]
Groups_total_goals.reset_index(inplace=True)
Groups_total_goals
Out[28]:
group goals_total
0 Group A 15
1 Group B 16
2 Group C 12
3 Group D 11
4 Group E 22
5 Group F 11
6 Group G 16
7 Group H 17
8 Quarter-final 10
9 Round of 16 28
10 Semi-Final 5
In [29]:
px.bar(data_frame=Groups_total_goals,x=Groups_total_goals.group,
       y=Groups_total_goals.goals_total,
      title='Total Goals Scored in each round')

semi-final match represrntation

In [30]:
semi_final_data = match_data.where(match_data.group=='Semi-Final').dropna()
sem = semi_final_data[['home_team','away_team','home_goals','away_goals','home_poss','away_poss']]
sem
Out[30]:
home_team away_team home_goals away_goals home_poss away_poss
60 Argentina Croatia 3.0 0.0 40.0 60.0
61 France Morocco 2.0 0.0 39.0 61.0
In [36]:
z = make_subplots(rows=1,cols=2,horizontal_spacing=0.2)
In [37]:
z.add_trace(go.Bar(y=sem['home_team'],x=semi_final_data['home_goals'],orientation='h'))
z.add_trace(go.Bar(y=sem['away_team'],x=sem['away_goals'],orientation='h'),col=2,row=1)
In [33]:
passes=match_data[['home_team','away_team','home_poss','away_poss']]
passes=passes[:10]


i = make_subplots(rows=1,cols=2,row_heights=[10],column_widths=[10,10],horizontal_spacing=0.2)
In [34]:
i.add_trace(go.Bar(x=passes['home_poss'],y=passes['home_team'],orientation='h',name='Home_team',text=passes['home_poss']),row=1,col=1)
i.add_trace(go.Bar(x=passes['away_poss'],y=passes['away_team'],orientation='h',name='Away_team',text=passes['away_poss']),row=1,col=2)
In [38]:
team_goals=match_data.groupby(by=('home_team')).sum()[['home_goals','home_attempts']].reset_index()
team_goals_sorted = team_goals.sort_values('home_goals')
In [39]:
fig1 =make_subplots(rows=2,cols=2,vertical_spacing=0.5)
import plotly.graph_objects as go

fig1.add_trace(
    go.Bar(x=team_goals.home_team,y=team_goals.home_goals,name='un sorted'),
    row=1,
    col=1)

fig1.add_trace(
    go.Bar(x=team_goals_sorted.home_team,y=team_goals_sorted.home_goals,name='sorted'),
    row=1,
    col=2
)

fig1.add_trace(
    go.Bar(x=mo.player,y=mo.aerials_lost,name='Number of losses aerlies'),
    row=2,
    col=1
)
fig1.show()
In [64]:
team1_cards=match_data.groupby(by=['home_team']).sum()[['home_RedCards','home_yellow_cards']]
team1_cards.reset_index(inplace=True)
team1_cards.sort_values(by='home_yellow_cards',inplace=True)
In [65]:
team1=home_yellow.melt('home_team',var_name='cols',value_name='vals')
In [66]:
px.bar(team1,x=team1['home_team'],
             y=team1['vals'] , color='cols',
             barmode='group' , text_auto=True,title='number of yellow and red cards')

Scatter plot

In [43]:
#not interactive
sum_atte=match_data.groupby(['venue','match_no']).sum()[['attendance']].reset_index()
sum_atte=sum_atte.sort_values('attendance',ascending=False)
In [44]:
sum_atte
Out[44]:
venue match_no attendance
54 Lusail Iconic Stadium 61 88966
48 Lusail Iconic Stadium 24 88966
49 Lusail Iconic Stadium 32 88668
53 Lusail Iconic Stadium 58 88235
47 Lusail Iconic Stadium 16 88103
... ... ... ...
1 Ahmed bin Ali Stadium 12 40432
24 Al Thumama Stadium 11 40013
19 Al Janoub Stadium 29 39789
55 Stadium 974 7 39369
17 Al Janoub Stadium 13 39089

62 rows × 3 columns

In [45]:
fig=plt.scatter(x=sum_atte.match_no,y=sum_atte.attendance)
In [46]:
px.scatter(x=sum_atte.match_no,y=sum_atte.attendance,color=sum_atte.venue)
In [47]:
species=['homeXG','awayXG','home_goals','away_goals']
matrix = pd.plotting.scatter_matrix(match_data[species],figsize=(12,12))
In [48]:
gr=px.scatter(x=match_data.away_attempts,y=match_data.away_ontarget,
              title='correlation between #attempts and #balls on target')
In [49]:
gr.show()
In [50]:
px.scatter(data_frame=mo,x=mo.player,
           y=mo.aerials_lost,
           color=mo.position,size=mo.aerials_lost,
           facet_row='position',
           title='Number of Aerlis lost by morroc')

Line Plot

Number of Shots Trace for teams final

In [67]:
players=player_shooting.groupby(['team','player']).sum()[['shots','goals']].reset_index()

Ar=players.where(players.team=='Argentina').dropna()
Fr=players.where(players.team=='France').dropna()
In [68]:
gu = make_subplots(rows=1,cols=2,horizontal_spacing=0.1,vertical_spacing=0.5)
In [69]:
gu.add_trace(go.Line(x=Ar.player,y=Ar.shots,name='Argentina Trace'),row=1,col=1)

gu.add_trace(go.Line(x=Fr.player,y=Fr.shots,name='France Trace'),row=1,col=2)
C:\Users\Ibrahim_Refa3i\anaconda3\lib\site-packages\plotly\graph_objs\_deprecations.py:378: DeprecationWarning:

plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.


Dual-axis Line plot to see the changes about home_goals and away_goals

In [70]:
fig, ax = plt.subplots(figsize=(12,5))
ax2 = ax.twinx()
ax.set_title('Ghanges of goals')
ax.set_xlabel('Year')
ax.plot(match_data['date'], match_data['home_goals'], color='green', marker='x')
ax2.plot(match_data['date'], match_data['away_goals'], color='red', marker='o')
ax.set_ylabel('goals_team1')
ax2.set_ylabel('')
ax.legend(['home_goals'])
ax2.legend(['away_goals'], loc='upper center')
plt.show()
In [71]:
plt.figure(figsize=(8,8))
fi=sns.lineplot(data=match_data,x='date',y='goals_total')

some data prepration

In [72]:
matche_changes = match_data.groupby(['home_team']).sum()[['home_goals']]
match_changest = match_data.groupby(['away_team']).sum()[['away_goals']]
In [73]:
match_changest.reset_index()
matche_changes.reset_index()

li=matche_changes.home_goals
In [74]:
li2=match_changest.away_goals

li3=li+li2

team_goals=pd.DataFrame(li3).reset_index()
In [75]:
red_home = match_data.groupby('home_team').sum()[['home_RedCards']].reset_index()
red_away = match_data.groupby('away_team').sum()[['away_RedCards']].reset_index()
In [76]:
away_yellow=match_data.groupby('away_team').sum()[['away_yellow_cards']].reset_index()
home_yellow=match_data.groupby('home_team').sum()[['home_yellow_cards']].reset_index()
In [77]:
home_yellow['total']=home_yellow['home_yellow_cards']+away_yellow['away_yellow_cards']
home_yellow['red']=red_home['home_RedCards']+red_away['away_RedCards']
In [78]:
home_yellow.drop(['home_yellow_cards'],axis=1,inplace=True)
In [79]:
team_goals
Out[79]:
index 0
0 ARGENTINA 9.0
1 AUSTRALIA 4.0
2 Argentina NaN
3 BELGIUM 1.0
4 BRAZIL 8.0
5 CAMEROON 4.0
6 CANADA 2.0
7 COSTA RICA 3.0
8 CROATIA 6.0
9 Croatia NaN
10 DENMARK 1.0
11 ECUADOR 4.0
12 ENGLAND 13.0
13 FRANCE 11.0
14 France NaN
15 GERMANY 6.0
16 GHANA 5.0
17 IRAN 4.0
18 JAPAN 5.0
19 KOREA REPUBLIC 5.0
20 MEXICO 2.0
21 MOROCCO 5.0
22 Morocco NaN
23 NETHERLANDS 10.0
24 POLAND 3.0
25 PORTUGAL 12.0
26 QATAR 1.0
27 SAUDI ARABIA 3.0
28 SENEGAL 5.0
29 SERBIA 5.0
30 SPAIN 9.0
31 SWITZERLAND 5.0
32 TUNISIA 1.0
33 UNITED STATES 3.0
34 URUGUAY 2.0
35 WALES 1.0

Boxplot

In [80]:
px.box(team_goals,team_goals[0],title='Boxplot for each team goals')
In [81]:
px.box(data_frame=player_keepers,y=player_keepers.gk_saves,color=player_keepers.team,title='GK_saves')
In [82]:
match_data[['home_goals','away_goals']].boxplot()
Out[82]:
<AxesSubplot:>
In [83]:
team_palyer=player_stats.groupby(['team','player']).sum()[['goals']].reset_index()
In [84]:
team_palyer.sort_values('goals',ascending=False,inplace=True)
In [85]:
team_palyer = team_palyer[:10]

pie Chart

In [86]:
px.pie(data_frame=team_palyer,
       names=team_palyer.player,
       values=team_palyer.goals,
       color=team_palyer.team,hole=0.1,
       hover_name='player')
In [87]:
fi = px.pie(data_frame=sum_atte, values=sum_atte.attendance, names= sum_atte.venue,color=sum_atte.match_no,hole=0.5,
           title='Venue with Total Attendence')
fi.show()

TreeMap

In [88]:
f=px.treemap(data_frame=team_goals,path=['home_team'],values=team_goals[0],title='Home Team Goals')
f.show()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Input In [88], in <cell line: 1>()
----> 1 f=px.treemap(data_frame=team_goals,path=['home_team'],values=team_goals[0],title='Home Team Goals')
      2 f.show()

File ~\anaconda3\lib\site-packages\plotly\express\_chart_types.py:1571, in treemap(data_frame, names, values, parents, ids, path, color, color_continuous_scale, range_color, color_continuous_midpoint, color_discrete_sequence, color_discrete_map, hover_name, hover_data, custom_data, labels, title, template, width, height, branchvalues, maxdepth)
   1569 if path is not None and branchvalues is None:
   1570     branchvalues = "total"
-> 1571 return make_figure(
   1572     args=locals(),
   1573     constructor=go.Treemap,
   1574     trace_patch=dict(branchvalues=branchvalues, maxdepth=maxdepth),
   1575     layout_patch=layout_patch,
   1576 )

File ~\anaconda3\lib\site-packages\plotly\express\_core.py:1945, in make_figure(args, constructor, trace_patch, layout_patch)
   1942 layout_patch = layout_patch or {}
   1943 apply_default_cascade(args)
-> 1945 args = build_dataframe(args, constructor)
   1946 if constructor in [go.Treemap, go.Sunburst, go.Icicle] and args["path"] is not None:
   1947     args = process_dataframe_hierarchy(args)

File ~\anaconda3\lib\site-packages\plotly\express\_core.py:1405, in build_dataframe(args, constructor)
   1402     args["color"] = None
   1403 # now that things have been prepped, we do the systematic rewriting of `args`
-> 1405 df_output, wide_id_vars = process_args_into_dataframe(
   1406     args, wide_mode, var_name, value_name
   1407 )
   1409 # now that `df_output` exists and `args` contains only references, we complete
   1410 # the special-case and wide-mode handling by further rewriting args and/or mutating
   1411 # df_output
   1413 count_name = _escape_col_name(df_output, "count", [var_name, value_name])

File ~\anaconda3\lib\site-packages\plotly\express\_core.py:1207, in process_args_into_dataframe(args, wide_mode, var_name, value_name)
   1205         if argument == "index":
   1206             err_msg += "\n To use the index, pass it in directly as `df.index`."
-> 1207         raise ValueError(err_msg)
   1208 elif length and len(df_input[argument]) != length:
   1209     raise ValueError(
   1210         "All arguments should have the same length. "
   1211         "The length of column argument `df[%s]` is %d, whereas the "
   (...)
   1218         )
   1219     )

ValueError: Value of 'path_0' is not the name of a column in 'data_frame'. Expected one of ['index', 0] but received: home_team
In [89]:
px.treemap(match_data,path=['home_team','away_team'],values=match_data.home_goals)
C:\Users\Ibrahim_Refa3i\anaconda3\lib\site-packages\plotly\express\_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

C:\Users\Ibrahim_Refa3i\anaconda3\lib\site-packages\plotly\express\_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

HeatMap

In [90]:
px.density_heatmap(match_data,x='goals_total',title='Total Number of goals',text_auto=True,marginal_x='histogram')
In [92]:
merged=pd.merge(player_stats,player_shooting)

merg=merged.groupby('player').sum()[['games','assists']].reset_index()
In [93]:
top_10=merg.sort_values('assists',ascending=False).head(10)

pairplot for home_team and it's statistics

In [94]:
mat = match_data[['home_team','homeXG','home_goals','home_yellow_cards']]
In [95]:
sns.pairplot(mat)
Out[95]:
<seaborn.axisgrid.PairGrid at 0x195f95155b0>
In [ ]:
 
In [ ]: